import requests
from lxml import etree
from selenium import webdriver
import time
from selenium.webdriver.chrome.options import Options
import logging
import re

base_url = 'http://www.qxkp.net/'


headers = {
    'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
}

def save_url_and_title(url, title):
    content = title + ',' + url + '\n'
    logging.info(content)
    with open('url_and_title4.csv', 'a') as f:
        f.write(content)

def get_url(driver, page_url):
    driver.get(page_url)
    article_url = driver.find_element_by_xpath('/html/body/div[1]/div[4]/div[2]/div/div[2]/div/div[2]/div[1]/a').get_attribute("href")
    title = driver.find_element_by_xpath('/html/body/div[1]/div[4]/div[2]/div/div[2]/div/div[2]/div[1]/a').text
    print(article_url, title)
    save_url_and_title(article_url, title)
    try:
        next_url = driver.find_element_by_xpath('/html/body/div[1]/div[4]/div[2]/div/div[3]/div[1]/a[3]')
    except Exception as e:
        print(e)
        next_url = ''
    if not next_url:
        try:
            next_url = driver.find_element_by_xpath('/html/body/div[1]/div[4]/div[2]/div/div[3]/div[1]/a[1]')
            if next_url.text == '首页':
                print('最后一页')
                return None
        except Exception as e:
            print(e)
    if next_url:
        next_url = next_url.get_attribute("href")
    else:
        return None
    print('下一页:', next_url)
    get_url(driver, next_url)

logging.basicConfig(level=logging.INFO, filename='url_and_title4.log')

'''
# 灾害防御
    http://www.qxkp.net/zhfy/ghzh/ # 干旱灾害
    http://www.qxkp.net/zhfy/byhl/ # 暴雨洪涝
    http://www.qxkp.net/zhfy/hczh/ # 寒潮灾害
    http://www.qxkp.net/zhfy/gwfh/ # 高温防护
    http://www.qxkp.net/zhfy/scb/ # 沙尘暴
    http://www.qxkp.net/zhfy/tffy/ # 台风防御
    http://www.qxkp.net/zhfy/flaq/ # 防雷安全
    http://www.qxkp.net/zhfy/fzjz/ # 防震减灾
'''

list_url_path = [
            'zhfy/ghzh/', 'zhfy/byhl/', 'zhfy/hczh/', 'zhfy/gwfh/', 'zhfy/scb/', 
            'zhfy/tffy/', 'zhfy/flaq/', 'zhfy/fzjz/'
]

base_url = 'http://www.qxkp.net/'
first_url = 'http://www.qxkp.net/qxfw/shqx/'
ops = Options()
# ops.add_argument('--proxy-server=http://112.87.69.76:9999')
driver = webdriver.Chrome(executable_path='/home/ubuntu/桌面/ljz/dianping/chromedriver',chrome_options=ops)
driver.get(first_url)
time.sleep(2)
for url_path in list_url_path:
    url = base_url + url_path
    get_url(driver, url)
    time.sleep(2)

